home *** CD-ROM | disk | FTP | other *** search
/ ftp.mactech.com 2010 / ftp.mactech.com.tar / ftp.mactech.com / machack / Hacks97 / NewsTicker.sit / NewsTicker / source code / Extractors / CNNExtractor.cp < prev    next >
Text File  |  1997-06-19  |  4KB  |  150 lines

  1. /*------------------------------------------------------------------------------
  2. #
  3. #    NewsTicker, my Hack for 1997
  4. #
  5. #    CNNExtractor.h    -    Derived from HTMLExtractor, we get passed the tokens
  6. #                            and try to recognize headlines out of it.  We parse
  7. #                            the page "www.cnn.com", the news page of cnn
  8. #
  9. ------------------------------------------------------------------------------*/
  10.  
  11. #include "TickerGlobals.h"
  12.  
  13. #include "CNNExtractor.h"
  14. #include "HTMLExtractor.h"
  15.  
  16.  
  17. // Refresh every 20 minutes
  18.  
  19. long gCNNNextTime = 0;
  20. #define    kCNNPeriod    1200
  21. #define kCNNAddress "www.cnn.com"
  22.  
  23. class CNNExtractor: public HTMLExtractor
  24. {
  25.     protected:
  26.         enum    CNNParser    {    kncParsing,
  27.                                 //text headlines are <a>headline</a>
  28.                                 kncHasLink,
  29.                                 //Some big stories are <h2>text</h2>(other><a>
  30.                                 kncHasHeader, kncHasNotHeader    };
  31.                                 
  32.         CNNParser    mfCurrentState;
  33.         Str255        mfTheURL;
  34.         Str255        mfTheSubject;
  35.      
  36.     public:
  37.                         CNNExtractor(sMyDataPtr theDataPtr);
  38.         virtual        ~CNNExtractor        (void){ }
  39.         
  40.         virtual void    HandleToken(char* string, short numchars, Boolean isCommand);
  41. };
  42.  
  43. //
  44. // We just parse the entries to find the element
  45. //
  46. CNNExtractor::CNNExtractor(sMyDataPtr theDataPtr)
  47.         :HTMLExtractor(kCNNAddress, 1003, theDataPtr)
  48. {
  49.     unsigned long now;
  50.     
  51.     mfCurrentState = kncParsing;    //just waiting for our thing to come through
  52.     
  53.     GetDateTime(&now);
  54.     gCNNNextTime = now + kCNNPeriod;    //refresh the news every 20 minutes
  55. }
  56.  
  57. void CNNExtractor::HandleToken(char* string, short numchars, Boolean isCommand)
  58. {
  59.     if (isCommand)
  60.     {
  61.         switch (mfCurrentState)
  62.         {
  63.             case kncParsing:                        //from nothing, we want H2 or A
  64.                 if (MyCompareStr(string, "<H2>"))
  65.                 {
  66.                     mfCurrentState = kncHasHeader;
  67.                     mfTheSubject[0] = 0;
  68.                     mfTheURL[0] = 0;
  69.                 }
  70.                 else if (MyCompareStr(string, "<A "))
  71.                 {
  72.                     if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
  73.                     {
  74.                         mfCurrentState = kncHasLink;
  75.                         mfTheSubject[0] = 0;
  76.                     }
  77.                     else mfCurrentState = kncParsing;
  78.                 }
  79.                 break;
  80.             case kncHasLink:                        //if we hit another tage when in a has link,
  81.                 mfCurrentState = kncParsing;        //abort
  82.                 break;
  83.             case kncHasHeader:                    //for this, only waiting for </H2>
  84.                 if (MyCompareStr(string, "</H2>"))
  85.                     mfCurrentState = kncHasNotHeader;
  86.                     else mfCurrentState = kncParsing;
  87.                 break;
  88.             case kncHasNotHeader:                //for this, waiting for <a>
  89.                 if (MyCompareStr(string, "<A "))
  90.                 {
  91.                     if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
  92.                     {
  93.                         AddEntry(mfTheSubject, mfTheURL);
  94.                         mfCurrentState = kncParsing;
  95.                     }
  96.                     else mfCurrentState = kncParsing;
  97.                 }
  98.                 break;
  99.         }
  100.     }
  101.     else
  102.     {
  103.         if ((mfCurrentState==kncHasHeader)    //OK, get got a headline!
  104.             ||(mfCurrentState==kncHasLink))
  105.         {
  106.             if (numchars>255)
  107.                 numchars = 255;
  108.             mfTheSubject[0] = numchars;
  109.             BlockMove(string, &mfTheSubject[1], numchars);
  110.             
  111.             //skip some extraneous CNN stuff
  112.             if (EqualString(mfTheSubject, "\pIMPACT", false, false))
  113.                 mfCurrentState = kncParsing;
  114.             if (EqualString(mfTheSubject, "\pF U L L   S T O R Y", false, false))
  115.                 mfCurrentState = kncParsing;
  116.             if (EqualString(mfTheSubject, "\pTEXT - ONLY VERSION", false, false))
  117.                 mfCurrentState = kncParsing;
  118.             
  119.             if (mfCurrentState==kncHasLink)
  120.             {
  121.                 //Add the entry
  122.                 AddEntry(mfTheSubject, mfTheURL);
  123.                 mfCurrentState = kncParsing;
  124.             }
  125.         }
  126.     }
  127. }
  128.  
  129. void LoadCNN(sMyDataPtr gGlobalsPtr)
  130. {
  131.     CNNExtractor* theparser = new CNNExtractor(gGlobalsPtr);
  132.     
  133.     theparser->ReadEntries();
  134.     delete theparser;
  135.         
  136.     InitCursor();
  137. }
  138.  
  139. // This reloads us if necessary
  140. Boolean MustReloadCNN(sMyDataPtr    gGlobalsPtr)
  141. {
  142.     unsigned long now;
  143.     
  144.     GetDateTime(&now);
  145.     
  146.     if (now<gCNNNextTime)    //time to check yet?
  147.         return false;
  148.     
  149.     return true;        //always recheck on the time
  150. }